###########################
# 3. Conduct analysis of multi-verse results
###########################

library(dplyr)
library(matrixStats)
library(Hmisc)
library(tidyr)
library(ggplot2)
library(patchwork)

load('multiverse.RData')

###

# 1. Clean multiverse results


df_clean <- df_results[grep("std_", df_results$term), ]
df_clean <- df_clean[df_clean$temporal_control != 'trend',]

# Add 'party_level' column based on the conditions
df_clean <- df_clean %>%
  mutate(
    party_level = case_when(
      grepl("_operational", term) ~ "electorate-operational",
      grepl("_symbolic", term) ~ "electorate-symbolic",
      grepl("state", term) ~ "state",
      TRUE ~ "federal"
    ),
    # Add 'change_type' column based on the conditions
    change_type = ifelse(grepl("_party", term), "did", "can"),
    se_type = ifelse(complete.cases(cluster_robust_se), 'Cluster-robust', 'Regular'),
    threshold = as.character(n_threshold),
    std.error = ifelse(complete.cases(cluster_robust_se), cluster_robust_se, std.error)
  )

# Rename the variables for better clarity
df_clean <- df_clean %>%
  rename(
    federal_ideology = fed_ideology,
    state_ideology = state_ideology,
    electorate_level = elec_level,
    electorate_ideology = elec_type,
    temporal_control = temporal_control,
    demographic_inclusion = demographics,
    model_effects = model_option,
    state_n_threshold = n_threshold
  )

# Update levels of factors with more descriptive names
df_clean <- df_clean %>%
  mutate(
    federal_ideology = factor(federal_ideology, 
                              levels = c("nominate", "cf", "ada"),
                              labels = c("NOMINATE", 
                                         "CFscores", 
                                         "ADA Scores")),
    state_ideology = factor(state_ideology, 
                            levels = c("npat", "cf"),
                            labels = c("NPAT Scores", 
                                       "CFscores")),
    electorate_level = factor(electorate_level, 
                              levels = c("ntl_", "state_"),
                              labels = c("National Level", 
                                         "State Level")),
    electorate_ideology = factor(electorate_ideology, 
                             levels = c("symbolic", "operational"),
                             labels = c("Symbolic", 
                                        "Operational")),
    temporal_control = factor(temporal_control, 
                              levels = c("none", "trend"),
                              labels = c("No Temporal Control", 
                                         "Time Trend")),
    model_effects = factor(model_effects, 
                        levels = c("random_effect", "none", "state", "year", "both"),
                        labels = c("Random Effects (Party-State-Year)", 
                                   "Pooled OLS", 
                                   "State Fixed Effects", 
                                   "Year Fixed Effects", 
                                   "Both State and Year Effects")),
    state_n_threshold = factor(state_n_threshold,
                               levels = c('10', '30'),
                               labels = c('n >= 10', 'n >= 30')),
    demographics_included = factor(df_clean$demographic_inclusion, levels = c(F,T),
                                   labels = c('No demographic controls', 'Demographic controls'))
  )


###

# 2. Summary stats for main model

df_clean <- df_clean %>%
  mutate(
    t_statistic = estimate / std.error,            # Calculate t-statistic
    p.value = 2 * (1 - pnorm(abs(t_statistic)))    # Two-tailed p-value
  )

# Add a weight column based on the value of electorate_level
df_clean <- df_clean %>%
  mutate(weight = ifelse(electorate_level == "State Level", 0.5, 1))

# Calculate results with reweighted observations
results.tbl <- df_clean %>%
  group_by(change_type, party_level) %>%
  summarise(
    pos_sig = sum((estimate > 0 & p.value < 0.05) * weight),
    neg_sig = sum((estimate < 0 & p.value < 0.05) * weight),
    n = sum(weight),  # Adjust n to account for weights
    pos_pct = 100 * round(sum((estimate > 0 & p.value < 0.05) * weight) / sum(weight), 3),
    neg_pct = 100 * round(sum((estimate < 0 & p.value < 0.05) * weight) / sum(weight), 3),
    avg_coef = weightedMedian(estimate, weight),  # Median with weights
    p10_coef = wtd.quantile(estimate, probs = 0.10, weights = weight)[[1]],  # Weighted 10th percentile
    p90_coef = wtd.quantile(estimate, probs = 0.90, weights = weight)[[1]]   # Weighted 90th percentile
  )


###

# 3. Summary results by choice

# Clarify State Level separation
df_clean$elec_type2 <- as.character(df_clean$electorate_level)
df_clean$elec_type2[df_clean$electorate_level == 'State Level'] <- 
  ifelse(df_clean$threshold[df_clean$electorate_level == 'State Level'] == 10,
         'State Level (min 10)', 'State Level (min 30)')

# Function to calculate weighted medians for each ideology_type for a given column
calculate_weighted_median_by_column <- function(df, column_name) {
  df %>%
    group_by(across(all_of(column_name))) %>%
    summarise(
      weighted_median_federal = weightedMedian(federal, w = weight, na.rm = TRUE),
      weighted_median_state = weightedMedian(state, w = weight, na.rm = TRUE),
      weighted_median_electorate_symbolic = weightedMedian(`electorate-symbolic`, w = weight, na.rm = TRUE),
      weighted_median_electorate_operational = weightedMedian(`electorate-operational`, w = weight, na.rm = TRUE)
    ) %>%
    rename_with(~ paste0("", .), starts_with("weighted_median_")) %>%
    ungroup()
}

# List of columns to calculate weighted medians for
columns_to_summarise <- c("federal_ideology", "state_ideology", "elec_type2", "electorate_ideology", "temporal_control", "demographic_inclusion", "model_effects")

# i. Results for "Can happen"
results_wide_can <- df_clean[df_clean$change_type == 'can', ] %>%
  pivot_wider(
    names_from = party_level,
    values_from = estimate,
    id_cols = c(weight, federal_ideology, state_ideology, elec_type2, electorate_ideology, temporal_control, demographic_inclusion, model_effects, state_n_threshold)
  )

# Applying the weightedMedian function
can_results_list <- lapply(columns_to_summarise, function(column) {
  calculate_weighted_median_by_column(results_wide_can, column)
})

# Combine results into a single dataframe
can_results <- bind_rows(can_results_list, .id = "variable")

# ii. Results for "Does happen"
results_wide_does <- df_clean[df_clean$change_type == 'did',] %>%
  pivot_wider(names_from = party_level, values_from = estimate, 
              id_cols = c(weight, federal_ideology, state_ideology, elec_type2, electorate_ideology, temporal_control, demographic_inclusion, model_effects, state_n_threshold))

does_results_list <- lapply(columns_to_summarise, function(column) {
  calculate_weighted_median_by_column(results_wide_does, column)
})

does_results <- bind_rows(does_results_list, .id = "variable")

# iii. Save results
write.csv(does_results, 'do_choice_coefs.csv')
write.csv(can_results, 'can_choice_coefs.csv')


###

# 4. Specification curves

# Define the combinations of `party_level` and `change_type`
unique_combinations <- expand.grid(
  party_level = c("electorate-operational", "electorate-symbolic", "federal", "state"),
  change_type = c("can", "did")
)

# Create the six dataframes
dataframes_list <- lapply(1:nrow(unique_combinations), function(i) {
  combination <- unique_combinations[i, ]
  df_clean %>%
    filter(party_level == combination$party_level, change_type == combination$change_type) %>%
    select(
      estimate, std.error, federal_ideology, state_ideology, electorate_ideology, 
      elec_type2, temporal_control, demographics_included, model_effects, se_type
    ) %>%
    rename(electorate_type = elec_type2)
})

# Name the dataframes for easy reference
names(dataframes_list) <- apply(unique_combinations, 1, function(row) {
  paste(row["party_level"], row["change_type"], sep = "_")
})


# Define the desired order and labels for facets (excluding SE_type)
facet_order <- c("federal_ideology", "state_ideology", "electorate_type", 
                 "electorate_ideology", "demographics_included", 
                 "model_effects")

facet_labels <- c("Fed", "State", "Elec.", 
                  "Electorate", "Demos", 
                  "Model FX")

dataframes_list <- lapply(dataframes_list, function(df) {
  df %>% select(-temporal_control)
})

# Loop through each dataframe in the list and create/save a specification curve
for (name in names(dataframes_list)) {
  temp2 <- dataframes_list[[name]]  # Select the dataframe
  
  # Step 1: Add a significance category
  spec_curve <- temp2 %>%
    mutate(
      significance = case_when(
        estimate + 1.96 * std.error < 0 ~ "Negative Sig",
        estimate - 1.96 * std.error > 0 ~ "Positive Sig",
        TRUE ~ "Null"
      ),
      significance = factor(significance, levels = c("Negative Sig", "Null", "Positive Sig"))
    ) %>%
    arrange(estimate) %>%
    mutate(model_id = row_number())
  
  # Step 2: Reshape the data for the bottom panel
  facet_data <- spec_curve %>%
    select(model_id, federal_ideology, state_ideology, electorate_type, electorate_ideology, 
           demographics_included, model_effects, significance) %>%
    pivot_longer(cols = c(federal_ideology, state_ideology, electorate_type, 
                          electorate_ideology, demographics_included, 
                          model_effects), 
                 names_to = "feature", 
                 values_to = "value") %>%
    mutate(value = ifelse(is.na(value), "NA", value),
           feature = factor(feature, levels = facet_order, labels = facet_labels))  # Reorder facets
  
  # Step 3: Create the top panel
  plot_top <- ggplot(spec_curve, aes(x = model_id, y = estimate, color = significance)) +
    geom_point(size = 1) +
    geom_errorbar(aes(ymin = estimate - 1.96 * std.error, ymax = estimate + 1.96 * std.error), width = 0.2) +
    scale_color_manual(values = c("Negative Sig" = "red", "Null" = "gray", "Positive Sig" = "blue")) +
    theme_minimal() +
    labs(x = "Model ID", y = "Estimate", color = "Significance") +
    theme(
      axis.text.x = element_blank(),                              # Remove X-axis tick labels
      axis.ticks.x = element_blank(),                             # Remove X-axis ticks
      axis.text.y = element_text(size = 14),                      # Y-axis tick labels
      axis.title.x = element_text(size = 16, face = "bold"),      # X-axis title
      axis.title.y = element_text(size = 16, face = "bold"),      # Y-axis title
      legend.title = element_text(size = 16),                     # Legend title
      legend.text = element_text(size = 14)                       # Legend text
    )
  
  # Step 4: Prepare the bottom panel without facet titles
  plot_bottom <- ggplot(facet_data, aes(x = model_id, y = value, color = significance)) +
    geom_point(size = 1, shape = 124) +
    scale_color_manual(values = c("Negative Sig" = "red", "Null" = "gray", "Positive Sig" = "blue")) +
    theme_minimal() +
    labs(x = "Model ID", y = "", color = "Significance") +
    facet_grid(rows = vars(feature), scales = "free_y", space = "free_y") +  # Allows dynamic height
    theme(
      strip.text = element_blank(),                                 # Remove facet titles
      axis.text.y = element_text(size = 14),                        # Y-axis tick labels
      axis.ticks.y = element_blank(),                               # Remove Y-axis ticks
      axis.title.x = element_text(size = 16, face = "bold"),        # X-axis title
      panel.spacing = unit(0.75, "lines")                           # Slightly increase vertical space between facets
    )
  
  # Step 5: Combine the two panels
  final_plot <- plot_top / plot_bottom
  
  # Save the plot in the specified folder
  save_path <- "Ideological Placement of Parties/"
  plot_file_name <- paste0(save_path, name, "_specification_curve.png")
  ggsave(plot_file_name, final_plot, width = 12, height = 14)  # Adjusted height for better readability
  
  # Print confirmation
  message("Saved specification curve plot for: ", name)
}


